# ------------------------------------------------------------------------------
# Create variables for number of tests in the previous 12, 24, and 48 hrs
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 08_tests_in_k_hours.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
library(here)
library(yaml)
library(data.table)
library(tidyverse)
library(testit) # assert()
library(glue) # glue strings

temp <- here::here("code", "05_natural_experiment", "temp")
overnight_lab <- ""
u <- modules::use(here::here("lib", "util.R"))

# Load Data --------------------------------------------------------------------
message("Loading data...")
paths <- read_yaml(here::here("lib", "filepaths.yml"))
cohort <- readRDS(glue(paths$analysis$full_cohort)) %>%
  mutate(
    start_datetime = case_when(
      is.na(start_datetime) ~ lubridate::as_datetime(t0, tz = "EST"),
      TRUE ~ start_datetime
    )
  ) %>%
  .[order(.$start_datetime), ]
test_times <- filter(cohort, test_010_day) %>%
  .[["start_datetime"]]

tests_per_day <- cohort %>%
  group_by(t0) %>%
  summarize(num_tests = sum(test_010_day)) %>%
  ungroup

message("Summary of num tests by day: ")
print(summary(tests_per_day$num_tests))

cohort <- filter(cohort, split != "val")

# Tests in K hours -------------------------------------------------------------
message("Building variable for num tests in previous K hours...")
k_list <- c(12, 24, 48)
k_varnames <- c()
for(k in k_list){
  message("k = ", k)
  k_hours <- as.difftime(k, units = "hours")
  num_tests <- c()
  earliest = 1
  latest = 1

  for(enc_time in cohort$start_datetime){
      while(
        test_times[latest] < enc_time &
        latest < length(test_times)
      ){
        latest = latest + 1
      }

    while(
      (test_times[earliest] + k_hours) < enc_time &
      earliest < length(test_times)
    ){
      earliest = earliest + 1
    }

    assert("Earliest leq latest", earliest <= latest)
    assert("Earliest before latest", test_times[earliest] <= test_times[latest])

    n <- latest - earliest
    num_tests <- c(num_tests, n)
  }

  assert("Num tests same length as cohort", length(num_tests) == nrow(cohort))

  prev_k_varname <- glue("ntests_in_prev_{k}")
  k_varnames <- c(k_varnames, prev_k_varname)
  cohort[[prev_k_varname]] <- num_tests

  message("Summary of tests in previous ", k, " hours: ")
  print(summary(cohort[[prev_k_varname]]))
}

# Save -------------------------------------------------------------------------
message("Saving...")
save_cohort <- cohort %>%
  select(all_of(c("ed_enc_id", k_varnames)))
write_rds(save_cohort, file.path(temp, "tests_in_k_hours_df.rds"))

message("Done.")
